1 Regression Forecasting for Numerical Data

Use the Quality of Life data (Case06_QoL_Symptom_ChronicIllness) to fit several different Multiple Linear Regression models predicting clinically relevant outcomes, e.g., Chronic Disease Score.

Sys.getenv("JAVA_HOME")
## [1] "C:/Program Files (x86)/Java/jre1.8.0_361"
Sys.setenv(JAVA_HOME="C:/Program Files (x86)/Java/jre1.8.0_361")
library(rJava)
library(RWeka)
## Warning: package 'RWeka' was built under R version 4.2.3
ql.m5 <- M5P(CHRONICDISEASESCORE~., data=ql_train)
ql.m5
## M5 pruned model tree:
## (using smoothed linear models)
## 
## CHARLSONSCORE <= 0.5 : 
## |   MSA_Q_01 <= 3.5 : LM1 (418/71.737%)
## |   MSA_Q_01 >  3.5 : LM2 (142/90.248%)
## CHARLSONSCORE >  0.5 : 
## |   MSA_Q_02 <= 3.5 : LM3 (602/88.29%)
## |   MSA_Q_02 >  3.5 : 
## |   |   PH2_Q_02 <= 1.5 : 
## |   |   |   MSA_Q_06 <= 2.5 : LM4 (80/80.003%)
## |   |   |   MSA_Q_06 >  2.5 : LM5 (46/81.21%)
## |   |   PH2_Q_02 >  1.5 : LM6 (203/111.651%)
## 
## LM num: 1
## CHRONICDISEASESCORE = 
##  -0.2061 * LANGUAGE=SPANISH 
##  + 0.1008 * RACE_ETHNICITY=WHITE,AI-ALN,UNKNOWN,HISPANIC 
##  + 0.0045 * RACE_ETHNICITY=UNKNOWN,HISPANIC 
##  + 0.0054 * SEX=FEMALE 
##  + 0.091 * QOL_Q_02 
##  + 0.0033 * QOL_Q_06 
##  + 0.023 * QOL_Q_07 
##  - 0.0014 * QOL_Q_08 
##  - 0.0013 * QOL_Q_09 
##  + 0.0023 * MSA_Q_01 
##  + 0.0011 * MSA_Q_02 
##  - 0.0013 * MSA_Q_05 
##  + 0.0018 * MSA_Q_06 
##  - 0.0505 * MSA_Q_07 
##  - 0.0356 * MSA_Q_09 
##  - 0.0016 * MSA_Q_10 
##  - 0.032 * MSA_Q_14 
##  + 0.0633 * MSA_Q_17 
##  + 0.0007 * PH2_Q_01 
##  - 0.0018 * TOS_Q_02 
##  + 0.0031 * CHARLSONSCORE 
##  + 0.2525
## 
## LM num: 2
## CHRONICDISEASESCORE = 
##  -0.0137 * LANGUAGE=SPANISH 
##  + 0.0083 * RACE_ETHNICITY=WHITE,AI-ALN,UNKNOWN,HISPANIC 
##  + 0.0045 * RACE_ETHNICITY=UNKNOWN,HISPANIC 
##  + 0.2377 * SEX=FEMALE 
##  + 0.0106 * QOL_Q_02 
##  + 0.1524 * QOL_Q_06 
##  + 0.0027 * QOL_Q_07 
##  - 0.0037 * QOL_Q_08 
##  - 0.0013 * QOL_Q_09 
##  + 0.0043 * MSA_Q_01 
##  + 0.0032 * MSA_Q_02 
##  - 0.0035 * MSA_Q_05 
##  + 0.005 * MSA_Q_06 
##  - 0.0029 * MSA_Q_07 
##  - 0.0036 * MSA_Q_09 
##  - 0.0841 * MSA_Q_10 
##  + 0.1118 * PH2_Q_01 
##  - 0.1096 * TOS_Q_01 
##  - 0.1042 * TOS_Q_02 
##  + 0.0031 * CHARLSONSCORE 
##  + 0.4688
## 
## LM num: 3
## CHRONICDISEASESCORE = 
##  0.0023 * RACE_ETHNICITY=WHITE,AI-ALN,UNKNOWN,HISPANIC 
##  + 0.2113 * RACE_ETHNICITY=UNKNOWN,HISPANIC 
##  + 0.0033 * SEX=FEMALE 
##  + 0.0938 * QOL_Q_02 
##  - 0.0425 * QOL_Q_04 
##  + 0.0426 * QOL_Q_06 
##  + 0.0009 * QOL_Q_07 
##  - 0.0647 * QOL_Q_09 
##  + 0.0412 * MSA_Q_01 
##  + 0.0008 * MSA_Q_02 
##  + 0.0004 * PH2_Q_01 
##  + 0.0008 * PH2_Q_02 
##  - 0.0013 * TOS_Q_02 
##  + 0.0019 * CHARLSONSCORE 
##  + 0.7686
## 
## LM num: 4
## CHRONICDISEASESCORE = 
##  0.0041 * RACE_ETHNICITY=WHITE,AI-ALN,UNKNOWN,HISPANIC 
##  + 0.0518 * RACE_ETHNICITY=UNKNOWN,HISPANIC 
##  + 0.028 * SEX=FEMALE 
##  + 0.0035 * QOL_Q_02 
##  - 0.0841 * QOL_Q_04 
##  + 0.0153 * QOL_Q_06 
##  + 0.0073 * QOL_Q_07 
##  + 0.1033 * QOL_Q_08 
##  - 0.0042 * QOL_Q_09 
##  + 0.0027 * MSA_Q_01 
##  + 0.0014 * MSA_Q_02 
##  + 0.0215 * MSA_Q_05 
##  + 0.0147 * MSA_Q_06 
##  - 0.0182 * MSA_Q_11 
##  + 0.008 * MSA_Q_13 
##  + 0.0084 * MSA_Q_14 
##  - 0.0058 * MSA_Q_15 
##  + 0.0004 * PH2_Q_01 
##  + 0.0015 * PH2_Q_02 
##  + 0.0098 * TOS_Q_01 
##  - 0.0108 * TOS_Q_02 
##  + 0.0019 * CHARLSONSCORE 
##  + 0.8506
## 
## LM num: 5
## CHRONICDISEASESCORE = 
##  0.0041 * RACE_ETHNICITY=WHITE,AI-ALN,UNKNOWN,HISPANIC 
##  + 0.0518 * RACE_ETHNICITY=UNKNOWN,HISPANIC 
##  + 0.028 * SEX=FEMALE 
##  + 0.0035 * QOL_Q_02 
##  + 0.0153 * QOL_Q_06 
##  + 0.0073 * QOL_Q_07 
##  - 0.0042 * QOL_Q_09 
##  + 0.0027 * MSA_Q_01 
##  + 0.0014 * MSA_Q_02 
##  - 0.0895 * MSA_Q_03 
##  + 0.3383 * MSA_Q_05 
##  + 0.0229 * MSA_Q_06 
##  - 0.0241 * MSA_Q_11 
##  + 0.008 * MSA_Q_13 
##  + 0.0084 * MSA_Q_14 
##  - 0.0058 * MSA_Q_15 
##  + 0.0004 * PH2_Q_01 
##  + 0.0015 * PH2_Q_02 
##  + 0.0098 * TOS_Q_01 
##  - 0.0108 * TOS_Q_02 
##  + 0.0019 * CHARLSONSCORE 
##  + 0.772
## 
## LM num: 6
## CHRONICDISEASESCORE = 
##  0.0041 * RACE_ETHNICITY=WHITE,AI-ALN,UNKNOWN,HISPANIC 
##  + 0.4092 * RACE_ETHNICITY=UNKNOWN,HISPANIC 
##  + 0.0199 * SEX=FEMALE 
##  + 0.0035 * QOL_Q_02 
##  + 0.2416 * QOL_Q_06 
##  + 0.0052 * QOL_Q_07 
##  - 0.0042 * QOL_Q_09 
##  + 0.086 * MSA_Q_01 
##  + 0.0014 * MSA_Q_02 
##  - 0.0805 * MSA_Q_04 
##  - 0.0822 * MSA_Q_11 
##  + 0.0051 * MSA_Q_13 
##  + 0.0055 * MSA_Q_14 
##  - 0.0037 * MSA_Q_15 
##  + 0.0004 * PH2_Q_01 
##  + 0.0015 * PH2_Q_02 
##  + 0.0063 * TOS_Q_01 
##  - 0.0076 * TOS_Q_02 
##  + 0.0019 * CHARLSONSCORE 
##  + 0.5968
## 
## Number of Rules : 6
summary(ql.m5)
## 
## === Summary ===
## 
## Correlation coefficient                  0.4914
## Mean absolute error                      0.566 
## Root mean squared error                  0.7252
## Relative absolute error                 94.0868 %
## Root relative squared error             87.1244 %
## Total Number of Instances             1491
#Correlation coefficient                  0.4914
#Mean absolute error                      0.566
### M5 model has higher Correlation and lower MAE of than prediction tree. Thus the model is optimized. 

ql.p.m5<-predict(ql.m5, ql_test)
summary(ql.p.m5)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.07583 0.69591 1.01151 1.01569 1.23885 2.56245
cor(ql.p.m5, ql_test$CHRONICDISEASESCORE)
## [1] 0.2998408
#0.2998

MAE(ql_test$CHRONICDISEASESCORE, ql.p.m5)
## [1] 0.6413623
#0.641